library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
odiBattingFielding <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Statistics\\odicareerbattingandfielding.csv")
df <- odiBattingFielding[, 1:11]
head(df)
## Name Matches Innings Not_Outs Runs High_Score
## 1 Aaron James Finch 59 57 1 2169 148
## 2 Aavishkar Madhav Salvi 4 3 1 4 4
## 3 Abhimanyu Mithun 5 3 0 51 24
## 4 Abhishek Mohan Nayar 3 1 1 0 0
## 5 Abraham Benjamin de Villiers 197 189 34 8524 162
## 6 Adam Charles Voges 31 28 9 870 112
## Average No_Of_100 No_Of_50 Strike_Rate Catches_Taken
## 1 38.73 7 11 86.96 30
## 2 2.00 0 0 28.57 2
## 3 17.00 0 0 92.72 1
## 4 NA 0 0 0.00 0
## 5 54.99 24 47 99.96 161
## 6 45.78 1 4 87.17 7
df <- na.omit(df)
head(df)
## Name Matches Innings Not_Outs Runs High_Score
## 1 Aaron James Finch 59 57 1 2169 148
## 2 Aavishkar Madhav Salvi 4 3 1 4 4
## 3 Abhimanyu Mithun 5 3 0 51 24
## 5 Abraham Benjamin de Villiers 197 189 34 8524 162
## 6 Adam Charles Voges 31 28 9 870 112
## 7 Adam Craig Gilchrist 286 278 11 9595 172
## Average No_Of_100 No_Of_50 Strike_Rate Catches_Taken
## 1 38.73 7 11 86.96 30
## 2 2.00 0 0 28.57 2
## 3 17.00 0 0 92.72 1
## 5 54.99 24 47 99.96 161
## 6 45.78 1 4 87.17 7
## 7 35.93 16 55 96.89 416
summary(df)
## Name Matches Innings
## Aaron James Finch : 1 Min. : 1.00 Min. : 1.00
## Aavishkar Madhav Salvi : 1 1st Qu.: 25.00 1st Qu.: 14.00
## Abhimanyu Mithun : 1 Median : 68.00 Median : 42.00
## Abraham Benjamin de Villiers: 1 Mean : 99.22 Mean : 78.05
## Adam Charles Voges : 1 3rd Qu.:153.00 3rd Qu.:114.00
## Adam Craig Gilchrist : 1 Max. :463.00 Max. :452.00
## (Other) :245
## Not_Outs Runs High_Score Average
## Min. : 0.00 Min. : 0 Min. : 0.00 Min. : 0.00
## 1st Qu.: 3.00 1st Qu.: 123 1st Qu.: 30.50 1st Qu.:11.82
## Median : 9.00 Median : 737 Median : 72.00 Median :23.39
## Mean :14.26 Mean : 2065 Mean : 79.96 Mean :23.94
## 3rd Qu.:21.50 3rd Qu.: 2546 3rd Qu.:123.00 3rd Qu.:34.30
## Max. :72.00 Max. :18426 Max. :264.00 Max. :90.50
##
## No_Of_100 No_Of_50 Strike_Rate Catches_Taken
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.0
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 71.22 1st Qu.: 7.0
## Median : 0.00 Median : 2.00 Median : 80.64 Median : 19.0
## Mean : 2.96 Mean :11.33 Mean : 79.30 Mean : 41.1
## 3rd Qu.: 3.00 3rd Qu.:14.00 3rd Qu.: 89.70 3rd Qu.: 49.5
## Max. :49.00 Max. :96.00 Max. :157.89 Max. :416.0
##
set.seed(20)
df1 <- df %>%
select(3, 10)
df2 <- df %>%
select(3, 4, 8)
OBFCluster <- kmeans(df1, 5)
OBFCluster$cluster <- as.factor(OBFCluster$cluster)
ggplot(df1, aes(Innings, Strike_Rate, color = OBFCluster$cluster)) +
geom_point(size = 2) +
scale_color_hue(labels = c("Best players", "Bad Players", "Good players", "Useless", "Worst Players")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("ODI Career Matches vs Strike Rate")

p <- plot_ly(df1, x = ~Innings, y = ~Strike_Rate, type = 'scatter',
mode = 'markers', color = OBFCluster$cluster,
text = ~paste('Name: ', df$Name)) %>%
layout(title = "Cluster of Averages (batsmen)")
p
OBFCluster2 <- kmeans(df2, 5)
OBFCluster2$cluster <- as.factor(OBFCluster2$cluster)
ggplot(df, aes(Innings, No_Of_100, color = OBFCluster2$cluster)) +
geom_point(size = 2) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Players with more notouts and centuries")

q <- plot_ly(df2, x = ~Innings, y = ~No_Of_100, type = 'scatter',
mode = 'markers', color = OBFCluster2$cluster,
text = ~paste('Name: ', df$Name)) %>%
layout(title = "Cluster of notouts and centuries")
q